{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Stop</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Price</th>\n",
       "      <th>Type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:43:24</td>\n",
       "      <td>11号线祁连山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:22:04</td>\n",
       "      <td>11号线枫桥路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:21:18</td>\n",
       "      <td>2号线娄山关路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:07:31</td>\n",
       "      <td>2号线娄山关路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:08:42</td>\n",
       "      <td>11号线祁连山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          ID        Date      Time      Stop Mode  Price Type\n",
       "0  100405844  2015-04-01  12:43:24  11号线祁连山路   地铁    4.0  非优惠\n",
       "1  100405844  2015-04-01  18:22:04   11号线枫桥路   地铁    3.0  非优惠\n",
       "2  100405844  2015-04-01  08:21:18   2号线娄山关路   地铁    3.0  非优惠\n",
       "3  100405844  2015-04-01  12:07:31   2号线娄山关路   地铁    0.0  非优惠\n",
       "4  100405844  2015-04-01  18:08:42  11号线祁连山路   地铁    0.0  非优惠"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IC_data = pd.read_csv(r'data-sample\\SPTCC-20150401-sample.csv',header=None)\n",
    "IC_data.columns = ['ID','Date','Time','Stop','Mode','Price','Type']\n",
    "IC_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 提取地铁数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Stop</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Price</th>\n",
       "      <th>Type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:43:24</td>\n",
       "      <td>11号线祁连山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:22:04</td>\n",
       "      <td>11号线枫桥路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:21:18</td>\n",
       "      <td>2号线娄山关路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:07:31</td>\n",
       "      <td>2号线娄山关路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:08:42</td>\n",
       "      <td>11号线祁连山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442855</th>\n",
       "      <td>2301108266</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:48:21</td>\n",
       "      <td>2号线中山公园</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442856</th>\n",
       "      <td>2301108266</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:06:45</td>\n",
       "      <td>2号线虹桥火车站</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442857</th>\n",
       "      <td>2104162123</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:48:07</td>\n",
       "      <td>2号线中山公园</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442858</th>\n",
       "      <td>2104162123</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:18:16</td>\n",
       "      <td>2号线虹桥火车站</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442859</th>\n",
       "      <td>3103885141</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:16:19</td>\n",
       "      <td>2号线虹桥火车站</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>360873 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                ID        Date      Time      Stop Mode  Price Type\n",
       "0        100405844  2015-04-01  12:43:24  11号线祁连山路   地铁    4.0  非优惠\n",
       "1        100405844  2015-04-01  18:22:04   11号线枫桥路   地铁    3.0  非优惠\n",
       "2        100405844  2015-04-01  08:21:18   2号线娄山关路   地铁    3.0  非优惠\n",
       "3        100405844  2015-04-01  12:07:31   2号线娄山关路   地铁    0.0  非优惠\n",
       "4        100405844  2015-04-01  18:08:42  11号线祁连山路   地铁    0.0  非优惠\n",
       "...            ...         ...       ...       ...  ...    ...  ...\n",
       "442855  2301108266  2015-04-01  23:48:21   2号线中山公园   地铁    4.0  非优惠\n",
       "442856  2301108266  2015-04-01  23:06:45  2号线虹桥火车站   地铁    0.0  非优惠\n",
       "442857  2104162123  2015-04-01  23:48:07   2号线中山公园   地铁    4.0  非优惠\n",
       "442858  2104162123  2015-04-01  23:18:16  2号线虹桥火车站   地铁    0.0  非优惠\n",
       "442859  3103885141  2015-04-01  23:16:19  2号线虹桥火车站   地铁    0.0  非优惠\n",
       "\n",
       "[360873 rows x 7 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IC_data = IC_data[IC_data['Mode']=='地铁']\n",
    "IC_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'11号线'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r = IC_data['Stop'][0]\n",
    "r[0:r.find('线')+1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'祁连山路'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r[r.find('线')+1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Stop</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Price</th>\n",
       "      <th>Type</th>\n",
       "      <th>Line</th>\n",
       "      <th>Station</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:43:24</td>\n",
       "      <td>11号线祁连山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>11号线</td>\n",
       "      <td>祁连山路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:22:04</td>\n",
       "      <td>11号线枫桥路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>11号线</td>\n",
       "      <td>枫桥路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:21:18</td>\n",
       "      <td>2号线娄山关路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>娄山关路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:07:31</td>\n",
       "      <td>2号线娄山关路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>娄山关路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100405844</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:08:42</td>\n",
       "      <td>11号线祁连山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>11号线</td>\n",
       "      <td>祁连山路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442855</th>\n",
       "      <td>2301108266</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:48:21</td>\n",
       "      <td>2号线中山公园</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>中山公园</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442856</th>\n",
       "      <td>2301108266</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:06:45</td>\n",
       "      <td>2号线虹桥火车站</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>虹桥火车站</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442857</th>\n",
       "      <td>2104162123</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:48:07</td>\n",
       "      <td>2号线中山公园</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>中山公园</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442858</th>\n",
       "      <td>2104162123</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:18:16</td>\n",
       "      <td>2号线虹桥火车站</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>虹桥火车站</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442859</th>\n",
       "      <td>3103885141</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>23:16:19</td>\n",
       "      <td>2号线虹桥火车站</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>虹桥火车站</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>360873 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                ID        Date      Time      Stop Mode  Price Type  Line  \\\n",
       "0        100405844  2015-04-01  12:43:24  11号线祁连山路   地铁    4.0  非优惠  11号线   \n",
       "1        100405844  2015-04-01  18:22:04   11号线枫桥路   地铁    3.0  非优惠  11号线   \n",
       "2        100405844  2015-04-01  08:21:18   2号线娄山关路   地铁    3.0  非优惠   2号线   \n",
       "3        100405844  2015-04-01  12:07:31   2号线娄山关路   地铁    0.0  非优惠   2号线   \n",
       "4        100405844  2015-04-01  18:08:42  11号线祁连山路   地铁    0.0  非优惠  11号线   \n",
       "...            ...         ...       ...       ...  ...    ...  ...   ...   \n",
       "442855  2301108266  2015-04-01  23:48:21   2号线中山公园   地铁    4.0  非优惠   2号线   \n",
       "442856  2301108266  2015-04-01  23:06:45  2号线虹桥火车站   地铁    0.0  非优惠   2号线   \n",
       "442857  2104162123  2015-04-01  23:48:07   2号线中山公园   地铁    4.0  非优惠   2号线   \n",
       "442858  2104162123  2015-04-01  23:18:16  2号线虹桥火车站   地铁    0.0  非优惠   2号线   \n",
       "442859  3103885141  2015-04-01  23:16:19  2号线虹桥火车站   地铁    0.0  非优惠   2号线   \n",
       "\n",
       "       Station  \n",
       "0         祁连山路  \n",
       "1          枫桥路  \n",
       "2         娄山关路  \n",
       "3         娄山关路  \n",
       "4         祁连山路  \n",
       "...        ...  \n",
       "442855    中山公园  \n",
       "442856   虹桥火车站  \n",
       "442857    中山公园  \n",
       "442858   虹桥火车站  \n",
       "442859   虹桥火车站  \n",
       "\n",
       "[360873 rows x 9 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IC_data['Line'] = IC_data['Stop'].apply(lambda r :r[:r.find('线')+1])\n",
    "IC_data['Station'] = IC_data['Stop'].apply(lambda r :r[r.find('线')+1:])\n",
    "IC_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 提取OD信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Stop</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Price</th>\n",
       "      <th>Type</th>\n",
       "      <th>Line</th>\n",
       "      <th>Station</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>334427</th>\n",
       "      <td>82024</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:25</td>\n",
       "      <td>8号线西藏北路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>8号线</td>\n",
       "      <td>西藏北路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334426</th>\n",
       "      <td>82024</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>09:07:29</td>\n",
       "      <td>10号线江湾体育场</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>10号线</td>\n",
       "      <td>江湾体育场</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>381022</th>\n",
       "      <td>88957</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>15:46:58</td>\n",
       "      <td>3号线宜山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>3号线</td>\n",
       "      <td>宜山路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>381021</th>\n",
       "      <td>88957</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>16:20:58</td>\n",
       "      <td>3号线东宝兴路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>3号线</td>\n",
       "      <td>东宝兴路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>376286</th>\n",
       "      <td>99313</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:26:26</td>\n",
       "      <td>8号线延吉中路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>8号线</td>\n",
       "      <td>延吉中路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>218391</th>\n",
       "      <td>4000039067</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>07:32:48</td>\n",
       "      <td>5号线颛桥</td>\n",
       "      <td>地铁</td>\n",
       "      <td>1.0</td>\n",
       "      <td>优惠</td>\n",
       "      <td>5号线</td>\n",
       "      <td>颛桥</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373585</th>\n",
       "      <td>4000039743</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:11:15</td>\n",
       "      <td>2号线南京西路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>南京西路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373584</th>\n",
       "      <td>4000039743</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:28:44</td>\n",
       "      <td>10号线交通大学</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>10号线</td>\n",
       "      <td>交通大学</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171701</th>\n",
       "      <td>4000040665</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:34</td>\n",
       "      <td>4号线上海体育场</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>4号线</td>\n",
       "      <td>上海体育场</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171702</th>\n",
       "      <td>4000040665</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>09:16:55</td>\n",
       "      <td>10号线南京东路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>10号线</td>\n",
       "      <td>南京东路</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>360873 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                ID        Date      Time       Stop Mode  Price Type  Line  \\\n",
       "334427       82024  2015-04-01  08:48:25    8号线西藏北路   地铁    0.0  非优惠   8号线   \n",
       "334426       82024  2015-04-01  09:07:29  10号线江湾体育场   地铁    4.0  非优惠  10号线   \n",
       "381022       88957  2015-04-01  15:46:58     3号线宜山路   地铁    0.0  非优惠   3号线   \n",
       "381021       88957  2015-04-01  16:20:58    3号线东宝兴路   地铁    4.0  非优惠   3号线   \n",
       "376286       99313  2015-04-01  08:26:26    8号线延吉中路   地铁    0.0  非优惠   8号线   \n",
       "...            ...         ...       ...        ...  ...    ...  ...   ...   \n",
       "218391  4000039067  2015-04-01  07:32:48      5号线颛桥   地铁    1.0   优惠   5号线   \n",
       "373585  4000039743  2015-04-01  18:11:15    2号线南京西路   地铁    0.0  非优惠   2号线   \n",
       "373584  4000039743  2015-04-01  18:28:44   10号线交通大学   地铁    3.0  非优惠  10号线   \n",
       "171701  4000040665  2015-04-01  08:48:34   4号线上海体育场   地铁    0.0  非优惠   4号线   \n",
       "171702  4000040665  2015-04-01  09:16:55   10号线南京东路   地铁    4.0  非优惠  10号线   \n",
       "\n",
       "       Station  \n",
       "334427    西藏北路  \n",
       "334426   江湾体育场  \n",
       "381022     宜山路  \n",
       "381021    东宝兴路  \n",
       "376286    延吉中路  \n",
       "...        ...  \n",
       "218391      颛桥  \n",
       "373585    南京西路  \n",
       "373584    交通大学  \n",
       "171701   上海体育场  \n",
       "171702    南京东路  \n",
       "\n",
       "[360873 rows x 9 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IC_data = IC_data.sort_values(['ID','Date','Time'])\n",
    "IC_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Stop</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Price</th>\n",
       "      <th>Type</th>\n",
       "      <th>Line</th>\n",
       "      <th>Station</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>334427</th>\n",
       "      <td>8.202400e+04</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>09:07:29</td>\n",
       "      <td>10号线江湾体育场</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>10号线</td>\n",
       "      <td>江湾体育场</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334426</th>\n",
       "      <td>8.895700e+04</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>15:46:58</td>\n",
       "      <td>3号线宜山路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>3号线</td>\n",
       "      <td>宜山路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>381022</th>\n",
       "      <td>8.895700e+04</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>16:20:58</td>\n",
       "      <td>3号线东宝兴路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>3号线</td>\n",
       "      <td>东宝兴路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>381021</th>\n",
       "      <td>9.931300e+04</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:26:26</td>\n",
       "      <td>8号线延吉中路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>8号线</td>\n",
       "      <td>延吉中路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>376286</th>\n",
       "      <td>9.931300e+04</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>09:07:27</td>\n",
       "      <td>9号线打浦桥</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>9号线</td>\n",
       "      <td>打浦桥</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>218391</th>\n",
       "      <td>4.000040e+09</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:11:15</td>\n",
       "      <td>2号线南京西路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>2号线</td>\n",
       "      <td>南京西路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373585</th>\n",
       "      <td>4.000040e+09</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:28:44</td>\n",
       "      <td>10号线交通大学</td>\n",
       "      <td>地铁</td>\n",
       "      <td>3.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>10号线</td>\n",
       "      <td>交通大学</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373584</th>\n",
       "      <td>4.000041e+09</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:34</td>\n",
       "      <td>4号线上海体育场</td>\n",
       "      <td>地铁</td>\n",
       "      <td>0.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>4号线</td>\n",
       "      <td>上海体育场</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171701</th>\n",
       "      <td>4.000041e+09</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>09:16:55</td>\n",
       "      <td>10号线南京东路</td>\n",
       "      <td>地铁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>非优惠</td>\n",
       "      <td>10号线</td>\n",
       "      <td>南京东路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171702</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>360873 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  ID        Date      Time       Stop Mode  Price Type  Line  \\\n",
       "334427  8.202400e+04  2015-04-01  09:07:29  10号线江湾体育场   地铁    4.0  非优惠  10号线   \n",
       "334426  8.895700e+04  2015-04-01  15:46:58     3号线宜山路   地铁    0.0  非优惠   3号线   \n",
       "381022  8.895700e+04  2015-04-01  16:20:58    3号线东宝兴路   地铁    4.0  非优惠   3号线   \n",
       "381021  9.931300e+04  2015-04-01  08:26:26    8号线延吉中路   地铁    0.0  非优惠   8号线   \n",
       "376286  9.931300e+04  2015-04-01  09:07:27     9号线打浦桥   地铁    4.0  非优惠   9号线   \n",
       "...              ...         ...       ...        ...  ...    ...  ...   ...   \n",
       "218391  4.000040e+09  2015-04-01  18:11:15    2号线南京西路   地铁    0.0  非优惠   2号线   \n",
       "373585  4.000040e+09  2015-04-01  18:28:44   10号线交通大学   地铁    3.0  非优惠  10号线   \n",
       "373584  4.000041e+09  2015-04-01  08:48:34   4号线上海体育场   地铁    0.0  非优惠   4号线   \n",
       "171701  4.000041e+09  2015-04-01  09:16:55   10号线南京东路   地铁    4.0  非优惠  10号线   \n",
       "171702           NaN         NaN       NaN        NaN  NaN    NaN  NaN   NaN   \n",
       "\n",
       "       Station  \n",
       "334427   江湾体育场  \n",
       "334426     宜山路  \n",
       "381022    东宝兴路  \n",
       "381021    延吉中路  \n",
       "376286     打浦桥  \n",
       "...        ...  \n",
       "218391    南京西路  \n",
       "373585    交通大学  \n",
       "373584   上海体育场  \n",
       "171701    南京东路  \n",
       "171702     NaN  \n",
       "\n",
       "[360873 rows x 9 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IC_data.shift(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Slin</th>\n",
       "      <th>Sstation</th>\n",
       "      <th>Eline</th>\n",
       "      <th>Estation</th>\n",
       "      <th>Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>82024</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:25</td>\n",
       "      <td>8号线</td>\n",
       "      <td>西藏北路</td>\n",
       "      <td>10号线</td>\n",
       "      <td>江湾体育场</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>88957</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>15:46:58</td>\n",
       "      <td>3号线</td>\n",
       "      <td>宜山路</td>\n",
       "      <td>3号线</td>\n",
       "      <td>东宝兴路</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99313</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:26:26</td>\n",
       "      <td>8号线</td>\n",
       "      <td>延吉中路</td>\n",
       "      <td>9号线</td>\n",
       "      <td>打浦桥</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>99313</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:09:51</td>\n",
       "      <td>9号线</td>\n",
       "      <td>打浦桥</td>\n",
       "      <td>8号线</td>\n",
       "      <td>延吉中路</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>116435</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:24:49</td>\n",
       "      <td>4号线</td>\n",
       "      <td>大连路</td>\n",
       "      <td>3号线</td>\n",
       "      <td>中山公园</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360863</th>\n",
       "      <td>4000038121</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:19:28</td>\n",
       "      <td>13号线</td>\n",
       "      <td>金运路</td>\n",
       "      <td>1号线</td>\n",
       "      <td>人民广场</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360865</th>\n",
       "      <td>4000038121</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>21:32:28</td>\n",
       "      <td>1号线</td>\n",
       "      <td>人民广场</td>\n",
       "      <td>13号线</td>\n",
       "      <td>金运路</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360867</th>\n",
       "      <td>4000039067</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>07:22:58</td>\n",
       "      <td>5号线</td>\n",
       "      <td>剑川路</td>\n",
       "      <td>5号线</td>\n",
       "      <td>颛桥</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360869</th>\n",
       "      <td>4000039743</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:11:15</td>\n",
       "      <td>2号线</td>\n",
       "      <td>南京西路</td>\n",
       "      <td>10号线</td>\n",
       "      <td>交通大学</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360871</th>\n",
       "      <td>4000040665</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:34</td>\n",
       "      <td>4号线</td>\n",
       "      <td>上海体育场</td>\n",
       "      <td>10号线</td>\n",
       "      <td>南京东路</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>178946 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                ID        Date      Time  Slin Sstation Eline Estation  Price\n",
       "0            82024  2015-04-01  08:48:25   8号线     西藏北路  10号线    江湾体育场    4.0\n",
       "2            88957  2015-04-01  15:46:58   3号线      宜山路   3号线     东宝兴路    4.0\n",
       "4            99313  2015-04-01  08:26:26   8号线     延吉中路   9号线      打浦桥    4.0\n",
       "6            99313  2015-04-01  18:09:51   9号线      打浦桥   8号线     延吉中路    4.0\n",
       "8           116435  2015-04-01  12:24:49   4号线      大连路   3号线     中山公园    3.0\n",
       "...            ...         ...       ...   ...      ...   ...      ...    ...\n",
       "360863  4000038121  2015-04-01  08:19:28  13号线      金运路   1号线     人民广场    3.0\n",
       "360865  4000038121  2015-04-01  21:32:28   1号线     人民广场  13号线      金运路    4.0\n",
       "360867  4000039067  2015-04-01  07:22:58   5号线      剑川路   5号线       颛桥    1.0\n",
       "360869  4000039743  2015-04-01  18:11:15   2号线     南京西路  10号线     交通大学    3.0\n",
       "360871  4000040665  2015-04-01  08:48:34   4号线    上海体育场  10号线     南京东路    4.0\n",
       "\n",
       "[178946 rows x 8 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tem1 = IC_data.reset_index()\n",
    "tem2 = IC_data.shift(-1).reset_index()\n",
    "IC_OD = pd.merge(tem1,tem2,how='left',on='index')\n",
    "IC_OD = IC_OD[(IC_OD['Price_x'] == 0 )& (IC_OD['Price_y'] > 0 )]\n",
    "IC_OD = IC_OD[['ID_x','Date_x','Time_x','Line_x','Station_x','Line_y','Station_y','Price_y']]\n",
    "IC_OD.columns = ['ID','Date','Time','Slin','Sstation','Eline','Estation','Price']\n",
    "IC_OD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 数据存储\n",
    "#IC_OD.to_csv(r'data-sample\\IC_OD.csv',index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### OD集计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'08'"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r = IC_OD['Time'][0]\n",
    "r.split(':')[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Date</th>\n",
       "      <th>Time</th>\n",
       "      <th>Slin</th>\n",
       "      <th>Sstation</th>\n",
       "      <th>Eline</th>\n",
       "      <th>Estation</th>\n",
       "      <th>Price</th>\n",
       "      <th>Hour</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>82024</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:25</td>\n",
       "      <td>8号线</td>\n",
       "      <td>西藏北路</td>\n",
       "      <td>10号线</td>\n",
       "      <td>江湾体育场</td>\n",
       "      <td>4.0</td>\n",
       "      <td>08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>88957</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>15:46:58</td>\n",
       "      <td>3号线</td>\n",
       "      <td>宜山路</td>\n",
       "      <td>3号线</td>\n",
       "      <td>东宝兴路</td>\n",
       "      <td>4.0</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99313</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:26:26</td>\n",
       "      <td>8号线</td>\n",
       "      <td>延吉中路</td>\n",
       "      <td>9号线</td>\n",
       "      <td>打浦桥</td>\n",
       "      <td>4.0</td>\n",
       "      <td>08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>99313</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:09:51</td>\n",
       "      <td>9号线</td>\n",
       "      <td>打浦桥</td>\n",
       "      <td>8号线</td>\n",
       "      <td>延吉中路</td>\n",
       "      <td>4.0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>116435</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>12:24:49</td>\n",
       "      <td>4号线</td>\n",
       "      <td>大连路</td>\n",
       "      <td>3号线</td>\n",
       "      <td>中山公园</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360863</th>\n",
       "      <td>4000038121</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:19:28</td>\n",
       "      <td>13号线</td>\n",
       "      <td>金运路</td>\n",
       "      <td>1号线</td>\n",
       "      <td>人民广场</td>\n",
       "      <td>3.0</td>\n",
       "      <td>08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360865</th>\n",
       "      <td>4000038121</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>21:32:28</td>\n",
       "      <td>1号线</td>\n",
       "      <td>人民广场</td>\n",
       "      <td>13号线</td>\n",
       "      <td>金运路</td>\n",
       "      <td>4.0</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360867</th>\n",
       "      <td>4000039067</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>07:22:58</td>\n",
       "      <td>5号线</td>\n",
       "      <td>剑川路</td>\n",
       "      <td>5号线</td>\n",
       "      <td>颛桥</td>\n",
       "      <td>1.0</td>\n",
       "      <td>07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360869</th>\n",
       "      <td>4000039743</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>18:11:15</td>\n",
       "      <td>2号线</td>\n",
       "      <td>南京西路</td>\n",
       "      <td>10号线</td>\n",
       "      <td>交通大学</td>\n",
       "      <td>3.0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360871</th>\n",
       "      <td>4000040665</td>\n",
       "      <td>2015-04-01</td>\n",
       "      <td>08:48:34</td>\n",
       "      <td>4号线</td>\n",
       "      <td>上海体育场</td>\n",
       "      <td>10号线</td>\n",
       "      <td>南京东路</td>\n",
       "      <td>4.0</td>\n",
       "      <td>08</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>178946 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                ID        Date      Time  Slin Sstation Eline Estation  Price  \\\n",
       "0            82024  2015-04-01  08:48:25   8号线     西藏北路  10号线    江湾体育场    4.0   \n",
       "2            88957  2015-04-01  15:46:58   3号线      宜山路   3号线     东宝兴路    4.0   \n",
       "4            99313  2015-04-01  08:26:26   8号线     延吉中路   9号线      打浦桥    4.0   \n",
       "6            99313  2015-04-01  18:09:51   9号线      打浦桥   8号线     延吉中路    4.0   \n",
       "8           116435  2015-04-01  12:24:49   4号线      大连路   3号线     中山公园    3.0   \n",
       "...            ...         ...       ...   ...      ...   ...      ...    ...   \n",
       "360863  4000038121  2015-04-01  08:19:28  13号线      金运路   1号线     人民广场    3.0   \n",
       "360865  4000038121  2015-04-01  21:32:28   1号线     人民广场  13号线      金运路    4.0   \n",
       "360867  4000039067  2015-04-01  07:22:58   5号线      剑川路   5号线       颛桥    1.0   \n",
       "360869  4000039743  2015-04-01  18:11:15   2号线     南京西路  10号线     交通大学    3.0   \n",
       "360871  4000040665  2015-04-01  08:48:34   4号线    上海体育场  10号线     南京东路    4.0   \n",
       "\n",
       "       Hour  \n",
       "0        08  \n",
       "2        15  \n",
       "4        08  \n",
       "6        18  \n",
       "8        12  \n",
       "...     ...  \n",
       "360863   08  \n",
       "360865   21  \n",
       "360867   07  \n",
       "360869   18  \n",
       "360871   08  \n",
       "\n",
       "[178946 rows x 9 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IC_OD['Hour'] = IC_OD['Time'].apply(lambda r: r.split(':')[0])\n",
    "IC_OD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hour</th>\n",
       "      <th>Sstation</th>\n",
       "      <th>Estation</th>\n",
       "      <th>Count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>04</td>\n",
       "      <td>上海南站</td>\n",
       "      <td>翔殷路</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>04</td>\n",
       "      <td>常熟路</td>\n",
       "      <td>新闸路</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>04</td>\n",
       "      <td>漕宝路</td>\n",
       "      <td>汶水路</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>05</td>\n",
       "      <td>七宝</td>\n",
       "      <td>打浦桥</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>05</td>\n",
       "      <td>七宝</td>\n",
       "      <td>新村路</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117941</th>\n",
       "      <td>23</td>\n",
       "      <td>静安寺</td>\n",
       "      <td>中山公园</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117942</th>\n",
       "      <td>23</td>\n",
       "      <td>静安寺</td>\n",
       "      <td>娄山关路</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117943</th>\n",
       "      <td>23</td>\n",
       "      <td>马陆</td>\n",
       "      <td>马陆</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117944</th>\n",
       "      <td>23</td>\n",
       "      <td>黄陂南路</td>\n",
       "      <td>外环路</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117945</th>\n",
       "      <td>23</td>\n",
       "      <td>龙华中路</td>\n",
       "      <td>耀华路</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>117946 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Hour Sstation Estation  Count\n",
       "0        04     上海南站      翔殷路      2\n",
       "1        04      常熟路      新闸路      1\n",
       "2        04      漕宝路      汶水路      1\n",
       "3        05       七宝      打浦桥      1\n",
       "4        05       七宝      新村路      1\n",
       "...     ...      ...      ...    ...\n",
       "117941   23      静安寺     中山公园      1\n",
       "117942   23      静安寺     娄山关路      1\n",
       "117943   23       马陆       马陆      1\n",
       "117944   23     黄陂南路      外环路      1\n",
       "117945   23     龙华中路      耀华路      1\n",
       "\n",
       "[117946 rows x 4 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "OD_Count = IC_OD.groupby(['Hour','Sstation','Estation'])['ID'].count().rename('Count').reset_index()\n",
    "OD_Count"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "4ce0e62306dd6a5716965d4519ada776f947e6dfc145b604b11307c10277ef29"
  },
  "kernelspec": {
   "display_name": "Python 3.8.9 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
